Cross conference group stage vs divided conference group stage comparison

Steps: + randomly assign each team a quality score, drawn from uniform distribution + have teams in each conference play round robin where chance of victory is proportional to quality ratio + select top 8 teams in each conference, divide 4 levels of seeds + perform two draws: + one cross conference + one where conferences are drawn into groups separately + measure average quality of opponents in each scenario, record seed and average opponent quality for both draw types + repeat above 1000 times (see part 2)

Part 1: single run example

assign team qualities, run regular season

import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import string
import statistics as stats
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
# function to assign team qualities
def assign_qualities(teams):
  # randomly assign quality scores to each team
  qualities = np.random.normal(loc=100,scale=30,size=len(teams))
  return(dict(zip(teams,qualities)))

east_qualities = assign_qualities(east_teams)
west_qualities = assign_qualities(west_teams)
# create round_robin function
def round_robin(teams,team_qualities):
  result_list=list()
  # create a dictionary matching the random draw number to points for each team
  points_dict={0:(3,0),1:(1,1),2:(0,3)}
  for team1 in teams:
    for team2 in teams:
      if team1!=team2:
        team1_qual,team2_qual=max(team_qualities[team1],10),max(team_qualities[team2],10)
        weights = [team1_qual,stats.mean([team1_qual,team2_qual])*np.random.uniform(0,1),team2_qual]
        probs = [w/sum(weights) for w in weights]
        # draw a number, 0= team1 wins, 1=draw, 2=team2 win
        result=points_dict[np.random.choice([0,1,2],1,False,probs)[0]]
        game_frame=pd.DataFrame({'home':[team1],'away':[team2],'home_points':result[0],'away_points':result[1]})
        result_list.append(game_frame)
  return(pd.concat(result_list))
east_results = round_robin(east_teams,east_qualities)
west_results = round_robin(west_teams,west_qualities)

def standings(teams,results,qualities,label):
  # frame of pots
  pots_frame=pd.DataFrame({'league_place':range(1,9),'pot':['pot1','pot1','pot2','pot2','pot3','pot3','pot4','pot4']})
  home_points = results.groupby(['home'])['home_points'].sum().reset_index()
  away_points = results.groupby(['away'])['away_points'].sum().reset_index()
  point_totals = pd.merge(home_points,away_points,left_on='home',right_on='away')
  point_totals['total_points']=point_totals['home_points']+point_totals['away_points']
  quality_frame = pd.DataFrame.from_dict(qualities,orient='index',columns=['quality']).rename_axis('team').reset_index()
  final = quality_frame.merge(point_totals,left_on='team',right_on='home').sort_values(by='total_points',ascending=False)
  final['league_place'] = final.reset_index().index+1
  final['label']=label
  return(final.merge(pots_frame,how='left',on='league_place')[['league_place','team','quality','total_points','pot','label']])

east_standings = standings(east_teams,east_results,east_qualities,'east')
west_standings = standings(west_teams,west_results,west_qualities,'west')
def draw_groups(standings,ngroups,label):
  if type(standings) is list:
    combined=pd.concat(standings)
  else:
    combined=standings
  seeded = combined[combined['pot'].notna()]
  # create column for random sort
  seeded['drawn'] = np.random.uniform(size=seeded.shape[0])
  seeded=seeded.sort_values(['pot','drawn']).reset_index()
  seeded['group'] = ['group'+str(x%ngroups) for x in seeded.index+1]
  # calculate quality of each group
  group_quality = seeded.groupby(['group'])['quality'].sum().reset_index().rename(columns={'group':'group','quality':'group_quality'})
  # find the highest quality opponent in each group
  
  seeded = seeded.merge(group_quality,on='group')
  seeded['opponent_quality']=seeded['group_quality']-seeded['quality']
  seeded['label'] = label
  return(seeded)
#west_standings = standings(west_teams,west_results,west_qualities)
east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
east_draw = draw_groups(east_standings,2,'separate-east')
west_draw = draw_groups(west_standings,2,'separate-west')
bind_rows(py$east_west_draw,py$east_draw,py$west_draw)%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.2, h = 0))+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='sum of group stage opponents quality scores',
       x='conference finish place',
       title=paste0('single_simulation_run'))
## `geom_smooth()` using formula 'y ~ x'

bind_rows(py$east_standings,py$west_standings)%>%
  ggplot(aes(x=quality,y=league_place,color=label))+
  geom_point()+
  labs(x="team quality",y="table place",color="conference",
       title="Team Quality vs League Table Place")+
  theme(legend.position='bottom')+
  scale_color_discrete(palette('dark2'))

# find top quality opponent
group_rank<-bind_rows(py$east_west_draw,py$east_draw,py$west_draw)%>%
  arrange(group,desc(quality))%>%
  group_by(label,group)%>%
  mutate(group_qual_rank=row_number())%>%
  ungroup()%>%
  select(label,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
with_top<-bind_rows(py$east_west_draw,py$east_draw,py$west_draw)%>%
  # join group rankings
  left_join(group_rank,by=c('label', 'group'))%>%
  # delete rows where team is top opponent
  filter(team!=top_opponent)%>%
  # only keep lowest group rank
  group_by(team,label,group)%>%
  slice(which.min(group_qual_rank))%>%
  ungroup()
with_top%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  filter(league_place<=4)%>%
  ggplot(aes(x=pot,y=top_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.2, h = 0))+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='top opponent quality score',
       x='conference finish place',
       title=paste0('single_simulation_run'))+
  scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'

nsim=1000
standings_list = list()
groups_list = list()
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
for sim in range(nsim):
  #print(sim)
  # assign qualities to teams
  east_qualities = assign_qualities(east_teams)
  west_qualities = assign_qualities(west_teams)
  # simulate round-robin league play
  east_results = round_robin(east_teams,east_qualities)
  west_results = round_robin(west_teams,west_qualities)
  # find standings
  east_standings = standings(east_teams,east_results,east_qualities,'east')
  west_standings = standings(west_teams,west_results,west_qualities,'west')
  # simulate draws
  #west_standings = standings(west_teams,west_results,west_qualities)
  east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
  east_draw = draw_groups(east_standings,2,'separate-east')
  west_draw = draw_groups(west_standings,2,'separate-west')
  standings_temp = pd.concat([east_standings,west_standings])
  standings_temp['sim']=str(sim) #note the simulation number
  standings_list.append(standings_temp)
  groups_temp = pd.concat([east_west_draw,east_draw,west_draw])
  groups_temp['sim'] = str(sim)
  groups_list.append(groups_temp)
  
sim_standings= pd.concat(standings_list)
sim_groups = pd.concat(groups_list)
py$sim_groups%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.1, h = 0),alpha=.1)+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='sum of group stage opponents quality scores',
       x='conference finish place',
       title=paste0(py$nsim,' simulations results - total group opponent quality\nlinear quality matchup weighting'))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `geom_smooth()` using formula 'y ~ x'

py$sim_standings%>%
  ggplot(aes(x=quality,y=league_place,color=label))+
  geom_point(position = position_jitter(w = 0, h = .1),alpha=.1)+
  labs(x="team quality",y="table place",color="conference",
       title=paste0("Team Quality vs League Table Place: ",py$nsim,' simulations\nlinear quality matchup weighting'))+
  theme(legend.position='bottom')+
  scale_color_brewer(palette='Dark2')
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set

group_rank<-py$sim_groups%>%
  arrange(sim,label,group,desc(quality))%>%
  group_by(sim,label,group)%>%
  mutate(group_qual_rank=row_number())%>%
  ungroup()%>%
  select(label,sim,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top<-py$sim_groups%>%
  # join group rankings
  left_join(group_rank,by=c('sim', 'label', 'group'))%>%
  # delete rows where team is top opponent
  filter(team!=top_opponent)%>%
  # only keep lowest group rank for each team
  group_by(team,sim,label,group)%>%
  slice(which.min(group_qual_rank))%>%
  ungroup()
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  filter(league_place<=4)%>%
  ggplot(aes(x=league_place,y=top_quality,color=draw_type))+  geom_point(position = position_jitter(w = 0.2, h = 0),alpha=.2)+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='top opponent quality score',
       x='conference finish place',
       title=paste0(py$nsim,' simulations results - top group opponent quality for pots 1 and 2\nlinear quality matchup weighting'))+
  scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'

py$sim_groups%>%
  group_by(league_place,label)%>%
  summarize('average_opponent_quality'=mean(opponent_quality))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `summarise()` has grouped output by 'league_place'. You can override using the
## `.groups` argument.
## # A tibble: 24 × 3
## # Groups:   league_place [8]
##    league_place label         average_opponent_quality
##           <dbl> <chr>                            <dbl>
##  1            1 combined                          337.
##  2            1 separate-east                     337.
##  3            1 separate-west                     336.
##  4            2 combined                          336.
##  5            2 separate-east                     338.
##  6            2 separate-west                     336.
##  7            3 combined                          345.
##  8            3 separate-east                     346.
##  9            3 separate-west                     346.
## 10            4 combined                          346.
## # … with 14 more rows
def round_robin_cu(teams,team_qualities):
  result_list=list()
  # create a dictionary matching the random draw number to points for each team
  points_dict={0:(3,0),1:(1,1),2:(0,3)}
  for team1 in teams:
    for team2 in teams:
      if team1!=team2:
        team1_qual,team2_qual=max(team_qualities[team1]**3,10),max(team_qualities[team2]**3,10)
        weights = [team1_qual,stats.mean([team1_qual,team2_qual])*np.random.uniform(0,1),team2_qual]
        probs = [w/sum(weights) for w in weights]
        # draw a number, 0= team1 wins, 1=draw, 2=team2 win
        result=points_dict[np.random.choice([0,1,2],1,False,probs)[0]]
        game_frame=pd.DataFrame({'home':[team1],'away':[team2],'home_points':result[0],'away_points':result[1]})
        result_list.append(game_frame)
  return(pd.concat(result_list))

nsim=1000
standings_list_cu = list()
groups_list_cu = list()
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
for sim in range(nsim):
  #print(sim)
  # assign qualities to teams
  east_qualities = assign_qualities(east_teams)
  west_qualities = assign_qualities(west_teams)
  # simulate round-robin league play
  east_results = round_robin_cu(east_teams,east_qualities)
  west_results = round_robin_cu(west_teams,west_qualities)
  # find standings
  east_standings = standings(east_teams,east_results,east_qualities,'east')
  west_standings = standings(west_teams,west_results,west_qualities,'west')
  # simulate draws
  #west_standings = standings(west_teams,west_results,west_qualities)
  east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
  east_draw = draw_groups(east_standings,2,'separate-east')
  west_draw = draw_groups(west_standings,2,'separate-west')
  standings_temp = pd.concat([east_standings,west_standings])
  standings_temp['sim']=str(sim) #note the simulation number
  standings_list_cu.append(standings_temp)
  groups_temp = pd.concat([east_west_draw,east_draw,west_draw])
  groups_temp['sim'] = str(sim)
  groups_list_cu.append(groups_temp)
  
sim_standings_cu= pd.concat(standings_list_cu)
sim_groups_cu = pd.concat(groups_list_cu)
py$sim_groups_cu%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.1, h = 0),alpha=.1)+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='sum of group stage opponents quality scores',
       x='conference finish place',
       title=paste0(py$nsim,' simulations results - total group opponent quality\ncubed quality matchup weighting'))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `geom_smooth()` using formula 'y ~ x'

py$sim_standings_cu%>%
  ggplot(aes(x=quality,y=league_place,color=label))+
  geom_point(position = position_jitter(w = 0, h = .1),alpha=.1)+
  labs(x="team quality",y="table place",color="conference",
       title=paste0("Team Quality vs League Table Place: ",py$nsim,' simulations\ncubed quality matchup weighting'))+
  theme(legend.position='bottom')+
  scale_color_brewer(palette='Dark2')
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set

group_rank<-py$sim_groups_cu%>%
  arrange(sim,label,group,desc(quality))%>%
  group_by(sim,label,group)%>%
  mutate(group_qual_rank=row_number())%>%
  ungroup()%>%
  select(label,sim,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top<-py$sim_groups_cu%>%
  # join group rankings
  left_join(group_rank,by=c('sim', 'label', 'group'))%>%
  # delete rows where team is top opponent
  filter(team!=top_opponent)%>%
  # only keep lowest group rank for each team
  group_by(team,sim,label,group)%>%
  slice(which.min(group_qual_rank))%>%
  ungroup()
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  filter(league_place<=4)%>%
  ggplot(aes(x=league_place,y=top_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.2, h = 0),alpha=.2)+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='top opponent quality score',
       x='conference finish place',
       title=paste0(py$nsim,' simulations results - top group opponent quality for pots 1 and 2\ncubed quality matchup weighting'))+
  scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'

from math import e
def round_robin_exp(teams,team_qualities):
  result_list=list()
  # create a dictionary matching the random draw number to points for each team
  points_dict={0:(3,0),1:(1,1),2:(0,3)}
  for team1 in teams:
    for team2 in teams:
      if team1!=team2:
        team1_qual,team2_qual=max(e**team_qualities[team1],10),max(e**team_qualities[team2],10)
        weights = [team1_qual,stats.mean([team1_qual,team2_qual])*np.random.uniform(0,1),team2_qual]
        probs = [w/sum(weights) for w in weights]
        # draw a number, 0= team1 wins, 1=draw, 2=team2 win
        result=points_dict[np.random.choice([0,1,2],1,False,probs)[0]]
        game_frame=pd.DataFrame({'home':[team1],'away':[team2],'home_points':result[0],'away_points':result[1]})
        result_list.append(game_frame)
  return(pd.concat(result_list))

nsim=1000
standings_list_exp = list()
groups_list_exp = list()
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
for sim in range(nsim):
  #print(sim)
  # assign qualities to teams
  east_qualities = assign_qualities(east_teams)
  west_qualities = assign_qualities(west_teams)
  # simulate round-robin league play
  east_results = round_robin_exp(east_teams,east_qualities)
  west_results = round_robin_exp(west_teams,west_qualities)
  # find standings
  east_standings = standings(east_teams,east_results,east_qualities,'east')
  west_standings = standings(west_teams,west_results,west_qualities,'west')
  # simulate draws
  #west_standings = standings(west_teams,west_results,west_qualities)
  east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
  east_draw = draw_groups(east_standings,2,'separate-east')
  west_draw = draw_groups(west_standings,2,'separate-west')
  standings_temp = pd.concat([east_standings,west_standings])
  standings_temp['sim']=str(sim) #note the simulation number
  standings_list_exp.append(standings_temp)
  groups_temp = pd.concat([east_west_draw,east_draw,west_draw])
  groups_temp['sim'] = str(sim)
  groups_list_exp.append(groups_temp)
  
sim_standings_exp= pd.concat(standings_list_exp)
sim_groups_exp = pd.concat(groups_list_exp)
py$sim_groups_exp%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.1, h = 0),alpha=.1)+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='sum of group stage opponents quality scores',
       x='conference finish place',
       title=paste0(py$nsim,' simulations results - total group opponent quality\nexponential matchup weighting'))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `geom_smooth()` using formula 'y ~ x'

py$sim_standings_exp%>%
  ggplot(aes(x=quality,y=league_place,color=label))+
  geom_point(position = position_jitter(w = 0, h = .1),alpha=.1)+
  labs(x="team quality",y="table place",color="conference",
       title=paste0("Team Quality vs League Table Place: ",py$nsim,' simulations\nexponential matchup weighting'))+
  theme(legend.position='bottom')+
  scale_color_brewer(palette='Dark2')
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set

group_rank<-py$sim_groups_exp%>%
  arrange(sim,label,group,desc(quality))%>%
  group_by(sim,label,group)%>%
  mutate(group_qual_rank=row_number())%>%
  ungroup()%>%
  select(label,sim,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top<-py$sim_groups_exp%>%
  # join group rankings
  left_join(group_rank,by=c('sim', 'label', 'group'))%>%
  # delete rows where team is top opponent
  filter(team!=top_opponent)%>%
  # only keep lowest group rank for each team
  group_by(team,sim,label,group)%>%
  slice(which.min(group_qual_rank))%>%
  ungroup()
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top%>%
  mutate(draw_type = case_when(grepl('separate',label)~'separate',
                               TRUE~label))%>%
  filter(league_place<=4)%>%
  ggplot(aes(x=league_place,y=top_quality,color=draw_type))+
  geom_point(position = position_jitter(w = 0.2, h = 0),alpha=.2)+
  geom_smooth(method='lm')+
  theme(legend.position="bottom")+
  labs(y='top opponent quality score',
       x='conference finish place',
       title=paste0(py$nsim,' simulations results - top group opponent quality for pots 1 and 2\nexponential matchup weighting'))+
  scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'